#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#   Midterm Projection (Years 1-5) using CESM-DPLE precip, temp with KNN block bootstraping of observed flows  #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Version 1.1        
# 18 June 2020 
# 
# ~~~~ Updates ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
# Functionalized redundant code
# Weighted Euclidean distance calculation with correlation between covariate and flow
# Using ensemble mean for each LE covariate, instead of quantiles
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #

#### Author: David Woodson

#### CLEAR MEMORY
rm(list=ls())

# #packages
# library(dplyr)
# #library(tidyr)
# library(lubridate)
# library(ggplot2)
# library(data.table)
# library(ggrepel)
# library(RColorBrewer)
# library(readxl)
# library(hydroGOF)
# library(kknn)
# library(plyr)
# library(qpcR)
# library(easyVerification)
# library(verification)
# library(reshape)
# library(stringr)
# library(ggpubr)
# library(randomForest)
# library(tidytext)
# library(DescTools)
# library(viridis)
# library(scatterplot3d)

#### SOURCE LIBRARIES (suppress package loading messages) AND SET WORKING DIRECTORY
mainDir="K:/My Drive/Phd Research/CRB Midterm Temperature Perturbed Predictions/Code/Phase 1/years1-10"

setwd(mainDir)
suppressPackageStartupMessages(source("Lib_HW1_orig.R"))
## Warning: package 'leaps' was built under R version 3.6.3
## Warning: package 'KernSmooth' was built under R version 3.6.3
## Warning: package 'akima' was built under R version 3.6.3
## Warning: package 'fields' was built under R version 3.6.3
## Warning: package 'spam' was built under R version 3.6.3
## Warning: package 'mgcv' was built under R version 3.6.3
## Warning: package 'nlme' was built under R version 3.6.3
## Warning: package 'locfit' was built under R version 3.6.3
## Warning: package 'MASS' was built under R version 3.6.3
## Warning: package 'prodlim' was built under R version 3.6.3
## Warning: package 'rgdal' was built under R version 3.6.3
## Warning: package 'sp' was built under R version 3.6.3
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'reshape2' was built under R version 3.6.3
## Warning: package 'scales' was built under R version 3.6.3
source("Lib_HW1_orig.R")
suppressPackageStartupMessages(source("Lib_HW2.R"))
## Warning: package 'VGAM' was built under R version 3.6.3
## Warning: package 'nnet' was built under R version 3.6.3
## Warning: package 'boot' was built under R version 3.6.3
## Warning: package 'dtw' was built under R version 3.6.3
## Warning: package 'proxy' was built under R version 3.6.3
## Warning: package 'spBayes' was built under R version 3.6.3
## Warning: package 'geoR' was built under R version 3.6.3
## Warning: package 'mclust' was built under R version 3.6.3
## Warning: package 'kohonen' was built under R version 3.6.3
source("Lib_HW2.R")


#### Clear the R console
cat("\f")
lib = "K:/My Drive/Phd Research/CRB Midterm Temperature Perturbed Predictions/Code/Phase 1/midTerm_forecasts/midTermForecast_Library_v1.4.R"

suppressPackageStartupMessages(source(lib))
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'lubridate' was built under R version 3.6.3
## Warning: package 'data.table' was built under R version 3.6.3
## Warning: package 'ggrepel' was built under R version 3.6.3
## Warning: package 'hydroGOF' was built under R version 3.6.3
## Warning: package 'zoo' was built under R version 3.6.3
## Warning: package 'plyr' was built under R version 3.6.3
## Warning: package 'qpcR' was built under R version 3.6.2
## Warning: package 'minpack.lm' was built under R version 3.6.2
## Warning: package 'rgl' was built under R version 3.6.3
## Warning: package 'robustbase' was built under R version 3.6.3
## Warning: package 'Matrix' was built under R version 3.6.3
## Warning: package 'easyVerification' was built under R version 3.6.2
## Warning: package 'SpecsVerification' was built under R version 3.6.3
## Warning: package 'ggpubr' was built under R version 3.6.3
## Warning: package 'tidytext' was built under R version 3.6.3
## Warning: package 'DescTools' was built under R version 3.6.3
## Warning: package 'viridis' was built under R version 3.6.3
## Warning: package 'xts' was built under R version 3.6.3
## Warning: package 'ggridges' was built under R version 3.6.3
source(lib)
# meanLengths = c(3,5,7)
meanLengths = 2:10
n = length(meanLengths)
fcsts = vector("list", n)

meanLength = 3
fcstMode = 'kFold'

for(i in meanLengths){
  
  cat("\n")
  cat(paste0("Mean length: ", i, "years"))
  cat("\n")
  
  fcsts[[i]] = midTermProjection(meanLength = i, fcstMode = 'kFold')
  
  cat("\n")
  cat("~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~")
  cat("\n")
  
}
## 
## Mean length: 2years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 46.8571428571428
## 
## CESM-DPLE, RF Forecast DI = 56.5714285714285
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ESP DI = 65.4285714285714

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 3years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 79.4117647058823
## 
## CESM-DPLE, RF Forecast DI = 68.8235294117647
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ESP DI = 60

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 4years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 56.3636363636364
## 
## CESM-DPLE, RF Forecast DI = 80.6060606060606
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ESP DI = 61.2121212121211

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 5years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 78.75
## 
## CESM-DPLE, RF Forecast DI = 68.75
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ESP DI = 49.6875

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 6years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 75.8064516129032
## 
## CESM-DPLE, RF Forecast DI = 85.8064516129033
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 7years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 77.3333333333334
## 
## CESM-DPLE, RF Forecast DI = 71.9999999999999
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 8years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 75.5172413793104
## 
## CESM-DPLE, RF Forecast DI = 80.3448275862069
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 9years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 82.1428571428571
## 
## CESM-DPLE, RF Forecast DI = 98.5714285714286
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 10years
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## 
## CESM-DPLE, KNN Forecast DI = 124.074074074074
## 
## CESM-DPLE, RF Forecast DI = 128.148148148148
## Using flowTercile, tempTercile, startYear as id variables
## Using flowTercile, tempTercile, startYear as id variables

## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
fcstSkillScores = NULL
fcstReliability = NULL
fcstReliabilityScores = NULL

for(i in meanLengths){
  
  cat("\n")
  cat(paste0("Mean length: ", i, "years"))
  cat("\n")
  
  #skill scores
  fcsts[[i]][[2]]$meanLength = i
  fcstSkillScores = bind_rows(fcstSkillScores, fcsts[[i]][[2]])
  
  #reliability plot data (probabilities)
  fcstReliability = bind_rows(fcstReliability, fcsts[[i]][[5]])
  
  #reliability scores
  fcstReliabilityScores = bind_rows(fcstReliabilityScores, fcsts[[i]][[6]])

  cat("\n")
  cat("~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~")
  cat("\n")
  
  
}
## 
## Mean length: 2years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 3years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 4years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 5years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 6years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 7years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 8years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 9years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 10years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
fcstSkillScores$meanLength = factor(fcstSkillScores$meanLength)
fcstSkillScores$variable = as.character(fcstSkillScores$variable)
fcstSkillScores$plotLabel = paste0("Mean Length (N) = ", fcstSkillScores$meanLength, "-years")

# fcstReliability$meanLength = factor(fcstReliability$meanLength)
fcstReliability$plotLabel = paste0("Mean Length (N) = ", fcstReliability$meanLength, "-years")

save forecasts and skillscores

saveRDS(fcsts, "RF_forecasts_v2.07_blindKFold.Rds")
# test = readRDS("RF_forecasts_v2.07_blindKFold.Rds")

saveRDS(fcstSkillScores, "RF_forecasts_skillscores_v2.07_blindKFold.Rds")

read forecasts and skillscores

fcsts = readRDS("RF_forecasts_v2.07_blindKFold.Rds")
# test = readRDS("RF_forecasts_v2.07_blindKFold.Rds")

fcstSkillScores = readRDS("RF_forecasts_skillscores_v2.07_blindKFold.Rds")

plot skill scores

plot1 = ggplot(data = subset(fcstSkillScores, variable == "RPSS" & model != "CESM-DPLE, KNN"), aes(x = meanLength, y = value, fill = model)) + 
      geom_boxplot() +  theme_bw() +
      #coord_cartesian(ylim = c(min(crpss.melt$CRPSS), 1)) +
      geom_hline(yintercept = 0) + xlab(paste0("Mean Length (years)")) +
      #ggtitle(paste0("CRPSS for average-, high-, and low- flow years\n", vl, " different 5-year blocks")) +
      theme(axis.title = element_text(face = "bold"), text=element_text(size=20)) +
      ylab("RPSS")
    
    print(plot1)

    ggsave("rpss_flowTerciles_AllMeanLengths_blind_RFonly.png", plot = print(plot1), device = "png", width = 10, height = 5, dpi = 450, units = "in")
    
 plot2 = ggplot(data = subset(fcstSkillScores, variable == "CRPSS" & model != "CESM-DPLE, KNN"), aes(x = meanLength, y = value, fill = model)) + 
      geom_boxplot() +  theme_bw() +
      #coord_cartesian(ylim = c(min(crpss.melt$CRPSS), 1)) +
      geom_hline(yintercept = 0) + xlab(paste0("Mean Length (years)")) +
      #ggtitle(paste0("CRPSS for average-, high-, and low- flow years\n", vl, " different 5-year blocks")) +
      theme(axis.title = element_text(face = "bold"), text=element_text(size=20)) +
      ylab("CRPSS")
    
    print(plot2)

    ggsave("crpss_flowTerciles_AllMeanLengths_blind_RFonly.png", plot = print(plot2), device = "png", width = 10, height = 5, dpi = 450, units = "in")
```r
  #save boxplot quartiles
d1 = ggplot_build(plot1)$data
print(d1[[1]])
```

```
##       fill       ymin      lower       middle      upper      ymax
## 1  #F8766D -1.8040665 -0.4389345 -0.054902154  0.6398075 0.9478514
## 2  #00BFC4 -0.9753086 -0.4711742  0.003227384  0.2709304 0.7041867
## 3  #F8766D -1.0427675 -0.1699043  0.408646823  0.6114730 0.9803672
## 4  #00BFC4 -0.8916716 -0.2857795 -0.152333927  0.3663306 0.8190696
## 5  #F8766D -1.5911608 -0.2475288  0.241560091  0.6673449 0.9366981
## 6  #00BFC4 -0.8337592 -0.3116778 -0.150561798  0.2438563 0.7421780
## 7  #F8766D -1.7175887 -0.7868249  0.303927773  0.5527396 0.9715077
## 8  #00BFC4 -1.3769778 -0.6019413 -0.280336238  0.3757278 0.7924998
## 9  #F8766D -1.6476009 -0.3881080  0.328427356  0.6562341 0.9739151
## 10 #F8766D -1.4600757 -0.4008399  0.208940898  0.6822097 0.9638551
## 11 #F8766D -2.3341885 -0.7918139 -0.025508069  0.6390359 0.9999775
## 12 #F8766D -2.1273847 -0.8454972  0.146380523  0.5836548 1.0000000
## 13 #F8766D -3.2214716 -1.7562973 -0.703087347 -0.1081327 1.0000000
##                           outliers   notchupper  notchlower      x flipped_aes
## 1                                   0.233196108 -0.34300042 0.8125       FALSE
## 2                                   0.201420326 -0.19496556 1.1875       FALSE
## 3  -1.752894, -1.974865, -1.448036  0.620374864  0.19691878 1.8125       FALSE
## 4                                   0.024366882 -0.32903473 2.1875       FALSE
## 5                        -1.784775  0.493189445 -0.01006926 2.8125       FALSE
## 6                                   0.002233805 -0.30335740 3.1875       FALSE
## 7                                   0.678077747 -0.07022220 3.8125       FALSE
## 8                                  -0.007266238 -0.55340624 4.1875       FALSE
## 9                                   0.624786967  0.03206774 5.0000       FALSE
## 10                                  0.521365195 -0.10348340 6.0000       FALSE
## 11                                  0.394301334 -0.44531747 7.0000       FALSE
## 12                       -3.481195  0.573113790 -0.28035274 8.0000       FALSE
## 13                                 -0.201928059 -1.20424663 9.0000       FALSE
##    PANEL group ymin_final ymax_final    xmin    xmax weight colour size alpha
## 1      1     1 -1.8040665  0.9478514 0.64375 0.98125      1 grey20  0.5    NA
## 2      1     2 -0.9753086  0.7041867 1.01875 1.35625      1 grey20  0.5    NA
## 3      1     3 -1.9748651  0.9803672 1.64375 1.98125      1 grey20  0.5    NA
## 4      1     4 -0.8916716  0.8190696 2.01875 2.35625      1 grey20  0.5    NA
## 5      1     5 -1.7847748  0.9366981 2.64375 2.98125      1 grey20  0.5    NA
## 6      1     6 -0.8337592  0.7421780 3.01875 3.35625      1 grey20  0.5    NA
## 7      1     7 -1.7175887  0.9715077 3.64375 3.98125      1 grey20  0.5    NA
## 8      1     8 -1.3769778  0.7924998 4.01875 4.35625      1 grey20  0.5    NA
## 9      1     9 -1.6476009  0.9739151 4.66250 5.33750      1 grey20  0.5    NA
## 10     1    10 -1.4600757  0.9638551 5.66250 6.33750      1 grey20  0.5    NA
## 11     1    11 -2.3341885  0.9999775 6.66250 7.33750      1 grey20  0.5    NA
## 12     1    12 -3.4811948  1.0000000 7.66250 8.33750      1 grey20  0.5    NA
## 13     1    13 -3.2214716  1.0000000 8.66250 9.33750      1 grey20  0.5    NA
##    shape linetype
## 1     19    solid
## 2     19    solid
## 3     19    solid
## 4     19    solid
## 5     19    solid
## 6     19    solid
## 7     19    solid
## 8     19    solid
## 9     19    solid
## 10    19    solid
## 11    19    solid
## 12    19    solid
## 13    19    solid
```

```r
#write.csv(d5, paste0(path, fcstType, "-", reservoir, "-", meanLength, "yr_SeptemberOnlyRMSEbyEns_boxplotSummary.xslx"), sep = ",")
saveRDS(d1, "rpss_flowTerciles_AllMeanLengths_blind_RFonly_boxplotSummary.rds")

dat1 = d1[[1]]

y1 = (dat1$middle[1] - dat1$middle[2])/dat1$middle[2]
```

plot reliability

  plot3 = ggplot(data = subset(fcstReliabilityScores, model!= "CESM-DPLE, KNN"), aes(x = factor(meanLength), y = V1, color = model)) + 
      geom_point() +  theme_bw() +
      #coord_cartesian(ylim = c(min(crpss.melt$CRPSS), 1)) +
      geom_hline(yintercept = 0) + xlab(paste0("Mean Length (years)")) +
      #ggtitle(paste0("CRPSS for average-, high-, and low- flow years\n", vl, " different 5-year blocks")) +
      theme(axis.title = element_text(face = "bold")) +
      ylab("Reliability Score") 
    
    print(plot3)

    ggsave("reliabilityScore_AllMeanLengths_blind_RFonly.png", plot = print(plot3), device = "png", dpi = 450, units = "in")
## Saving 7 x 5 in image
fcstRMSEs = NULL

for(i in meanLengths){
  
  cat("\n")
  cat(paste0("Mean length: ", i, "years"))
  cat("\n")
  
  rmse.i = melt.list(fcsts[[i]][[3]])
  rmse.i$meanLength = i
  
  fcstRMSEs = bind_rows(fcstRMSEs, rmse.i)

  cat("\n")
  cat("~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~")
  cat("\n")
  
  
}
## 
## Mean length: 2years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 3years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 4years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 5years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 6years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 7years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 8years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 9years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 10years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
fcstRMSEs$meanLength = factor(fcstRMSEs$meanLength)
colnames(fcstRMSEs)[2] = "model"
 plot = ggplot(data = subset(fcstRMSEs, model != "CESM-DPLE, KNN"), aes(x = meanLength, y = value, shape = model)) + 
      geom_point() +  theme_bw() +
      #coord_cartesian(ylim = c(min(crpss.melt$CRPSS), 1)) +
      geom_hline(yintercept = 0) + xlab(paste0("Mean Length (years)")) +
      #ggtitle(paste0("CRPSS for average-, high-, and low- flow years\n", vl, " different 5-year blocks")) +
      theme(axis.title = element_text(face = "bold")) +
      ylab("RMSE (MAF)")
    
    print(plot)

    ggsave("RMSEs_AllMeanLengths_blind_RFonly.png", plot = print(plot), device = "png", width = 10, height = 5, dpi = 450, units = "in")
fcstEnsembles = NULL
clim.Nyrs = matrix(NA, nrow = length(meanLengths), ncol = 2)
clim.Nyrs[,1] = meanLengths

i = 2
for(i in meanLengths){
  
  cat("\n")
  cat(paste0("Mean length: ", i, "years"))
  cat("\n")
  
  fcst.i = fcsts[[i]][[1]]$NyrMeanFlowBootstrap
  fcst.i$meanLength = i
  
  fcstEnsembles = bind_rows(fcstEnsembles, fcst.i)

  clim.Nyrs[i-1, 2] = as.numeric(fcsts[[i]][[4]])
  
  
  cat("\n")
  cat("~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~")
  cat("\n")
  
  
}
## 
## Mean length: 2years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 3years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 4years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 5years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 6years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 7years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 8years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 9years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
## 
## Mean length: 10years
## 
## ~~~~~~~~~~~~~~~~~~~~ Finished, start next mean length ~~~~~~~~~~~~~~~~~~~~
fcstEnsembles$meanLength = factor(fcstEnsembles$meanLength)

colnames(clim.Nyrs) = c("meanLength", "clim.Nyr.i")
clim.Nyrs = data.frame(clim.Nyrs)
clim.Nyrs$meanLength = factor(clim.Nyrs$meanLength)
fcstEnsembles$plotLabel = paste0("Mean Length (N) = ", fcstEnsembles$meanLength, "-years")
  plot = ggplot(subset(fcstEnsembles, covariate != "CESM-DPLE, KNN"), aes(x = startYear, y = q.f_maf, group = interaction(startYear, covariate), fill = covariate)) + 
    geom_boxplot() + labs(colour = "Covariates & Model") + theme_bw() +
    geom_point(aes(x = startYear, y = obsFlow), color = "red", stroke = 1, pch = 3) +
    # geom_crossbar(data = subset(fcstEnsembles, covariate != "CESM-DPLE, KNN"), aes(x = startYear, y = obsFlow, ymin=obsFlow, ymax=obsFlow), position=position_dodge(), color="blue", fatten = 1.5) + 
    xlab("Year") + ylab("N-year mean flow (MAF)") +
    theme(axis.title = element_text(face = "bold"), text=element_text(size=25)) +
    geom_hline(data = clim.Nyrs, aes(yintercept = clim.Nyr.i), col = "black", linetype="dashed") +
    scale_x_continuous(breaks=seq(1980, 2010, 10)) +  
    scale_fill_manual(values = "lightblue") +
    #scale_linetype_manual(name = "Climatology", values = 1, guide = guide_legend(override.aes = list(color = c("black")))) +
    facet_wrap(~meanLength, scales = "free", shrink = F)

    print(plot)

    ggsave("fcstEnsembles_AllMeanLengths_blind_RFonly.png", plot = print(plot), device = "png", width = 15, height = 10, dpi = 450, units = "in")
  plot = ggplot(subset(fcstEnsembles, covariate != "CESM-DPLE, KNN" & (meanLength == 3 | meanLength == 5 |meanLength == 7)), aes(x = startYear, y = q.f_maf, group = interaction(startYear, covariate), fill = covariate)) + 
    geom_boxplot() + labs(fill = "Covariates & Model") + theme_bw() +
    geom_point(aes(x = startYear, y = obsFlow), color = "red", stroke = 2, pch = 3) +
    # geom_crossbar(data = subset(fcstEnsembles, covariate != "CESM-DPLE, KNN" & (meanLength == 3 | meanLength == 5 |meanLength == 7)), aes(x = startYear, y = obsFlow, ymin=obsFlow, ymax=obsFlow), position=position_dodge(), color="red", fatten = 2) + 
    xlab("Year") + ylab("N-year mean flow (MAF)") +
    theme(axis.title = element_text(face = "bold"), text=element_text(size=25)) +
    geom_hline(data = clim.Nyrs, aes(yintercept = clim.Nyr.i), col = "black", linetype="dashed", lwd = 0.6) +
    scale_x_continuous(breaks=seq(1980, 2010, 5)) +  
    scale_fill_manual(values = "lightblue") +
    #scale_linetype_manual(name = "Climatology", values = 1, guide = guide_legend(override.aes = list(color = c("black")))) +
    facet_wrap(~plotLabel, scales = "free", shrink = F, ncol = 1)

    print(plot)

    ggsave("fcstEnsembles_AllMeanLengths_blind_RFonly_limited.png", plot = print(plot), device = "png", width = 15, height = 10, dpi = 450, units = "in")
plot <- ggplot(subset(fcstReliability, Model != "CESM-DPLE, KNN" & (meanLength == 3 | meanLength == 5 |meanLength == 7)), aes(pred, obs_freq, color = Model)) +
    geom_vline(xintercept = 0, linetype = 'dashed', colour = 'grey20', size = 0.5) +
    geom_vline(xintercept = 0.2, linetype = 'dashed', colour = 'grey20', size = 0.5) +
    geom_vline(xintercept = 0.4, linetype = 'dashed', colour = 'grey20', size = 0.5) +
    geom_vline(xintercept = 0.6, linetype = 'dashed', colour = 'grey20', size = 0.5) +
    geom_vline(xintercept = 0.8, linetype = 'dashed', colour = 'grey20', size = 0.5) +
    geom_vline(xintercept = 1, linetype = 'dashed', colour = 'grey20', size = 0.5) +
    geom_point() +
    theme_bw() +
    xlab('Forecast Probability') +
    ylab('Observed Frequency') +
    scale_x_continuous(expand = c(0.0,0.0), breaks = seq(0,1,0.2), minor_breaks = seq(0,1,0.1), limits = c(-0.02,1.02))+
    scale_y_continuous(expand = c(0.0,0.0), breaks = seq(0,1,0.2), limits = c(-0.02,1.02)) +
    theme(panel.grid.minor = element_line(size = 0.02), 
          panel.grid.major = element_line(color = 'grey65', size = 0.5),
          axis.title = element_text(face = "bold")) +
    coord_fixed() + 
    theme(axis.title = element_text(face = "bold"), text=element_text(size=25)) +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
    labs(color = "Model") +
    facet_wrap(~plotLabel, shrink = F, nrow = 1) +
    theme(panel.spacing.x = unit(0.4, "in"))
  # gg_circle(r=0.02, xc=fcst_prob, yc=obs_freq, color = 'red')
  
  print(plot)

  ggsave("fcstReliabilityDiagrams_limited_blind_RFonly.png", plot = print(plot), device = "png", width = 15, height = 7.5, dpi = 450, units = "in")
 p1b = ggplot(data = subset(fcstSkillScores, variable == "RPSS" & model != "CESM-DPLE, KNN")) + 
      geom_line(mapping = aes(x = startYear, y = value, linetype = model)) +
      geom_point(mapping = aes(x = startYear, y = value, color = flowTercile)) +
      theme_bw() +
      #geom_boxplot(aes(fill = covariate)) + 
      geom_hline(yintercept = 0) + xlab(paste0("Starting year of projected N-year mean flow")) +
      theme(axis.title = element_text(face = "bold"), axis.text.x = element_text(), text=element_text(size=25)) +
      ylab("RPSS") +
      guides(color=guide_legend("Flow Tercile")) +
    #scale_x_discrete(breaks=seq(min(as.numeric(as.character(crpss.melt$startYear))), max(as.numeric(as.character(crpss.melt$startYear))), 10))
      facet_wrap(~meanLength, scales = "free", shrink = F)

    print(p1b)

    ggsave("rpss_skillTS_AllMeanLengths_blind_RFonly.png", plot = print(p1b), device = "png", width = 15, height = 10, dpi = 450, units = "in")
 p1c = ggplot(data = subset(fcstSkillScores, variable == "RPSS" & model != "CESM-DPLE, KNN" & (meanLength == 3 | meanLength == 5 |meanLength == 7))) + 
      geom_line(mapping = aes(x = startYear, y = value, linetype = model), size = 1.1) +
      geom_point(mapping = aes(x = startYear, y = value, color = flowTercile), size = 3.5) +
      theme_bw() +
      #geom_boxplot(aes(fill = covariate)) + 
      geom_hline(yintercept = 0) + xlab(paste0("Starting year of projected N-year mean flow")) +
      theme(axis.title = element_text(face = "bold"), axis.text.x = element_text(), text=element_text(size=25)) +
      ylab("RPSS") +
      scale_x_continuous(breaks=seq(1980, 2015, 5)) +  
      guides(color=guide_legend("Flow Tercile")) +
    #scale_x_discrete(breaks=seq(min(as.numeric(as.character(crpss.melt$startYear))), max(as.numeric(as.character(crpss.melt$startYear))), 10))
      facet_wrap(~meanLength, scales = "free", shrink = F, ncol = 1)

    print(p1c)

    ggsave("rpss_skillTS_AllMeanLengths_blind_RFonly_limited.png", plot = print(p1c), device = "png", width = 15, height = 10, dpi = 450, units = "in")

Archived code